import os
import shutil

# 需要汇总的文件类型
EXTENSIONS = ['.pdf', '.md', '.txt', '.doc', '.docx']

# 源目录
SOURCE_DIR = '/Users/aresen/Downloads/00客服项目支撑/3知识库相关/集团制度/集团制度文档/增量预训练'  # 修改为你的目录
# 目标汇总目录
TARGET_DIR = SOURCE_DIR + 'huizong'

if not os.path.exists(TARGET_DIR):
    os.makedirs(TARGET_DIR)

file_count = 0
total_size = 0

for dirpath, _, filenames in os.walk(SOURCE_DIR):
    for fname in filenames:
        if any(fname.lower().endswith(ext) for ext in EXTENSIONS):
            src_path = os.path.join(dirpath, fname)
            dst_path = os.path.join(TARGET_DIR, fname)
            # 如果重名，自动加后缀
            base, ext = os.path.splitext(fname)
            i = 1
            while os.path.exists(dst_path):
                dst_path = os.path.join(TARGET_DIR, f"{base}_{i}{ext}")
                i += 1
            shutil.copy2(src_path, dst_path)
            file_count += 1
            total_size += os.path.getsize(dst_path)

print(f"文件总数: {file_count}")
print(f"文件总大小: {total_size / 1024 / 1024:.2f} MB") 